In [2]:
import pandas as pd
In [9]:
df=pd.read_csv('C:\\Users\\saiph\\Desktop\\sem2\\ait580\\project\\EV.csv')
In [10]:
df
Out[10]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract
0 2C4RC1N71H Kitsap Bremerton WA 98311.0 2017 CHRYSLER PACIFICA Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 33 0 23.0 349437882 POINT (-122.6466274 47.6341188) PUGET SOUND ENERGY INC 5.303509e+10
1 2C4RC1N7XL Stevens Colville WA 99114.0 2020 CHRYSLER PACIFICA Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 32 0 7.0 154690532 POINT (-117.90431 48.547075) AVISTA CORP 5.306595e+10
2 KNDC3DLCXN Yakima Yakima WA 98908.0 2022 KIA EV6 Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 14.0 219969144 POINT (-120.6027202 46.5965625) PACIFICORP 5.307700e+10
3 5YJ3E1EA0J Kitsap Bainbridge Island WA 98110.0 2018 TESLA MODEL 3 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 215 0 23.0 476786887 POINT (-122.5235781 47.6293323) PUGET SOUND ENERGY INC 5.303509e+10
4 1N4AZ1CP7J Thurston Tumwater WA 98501.0 2018 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 151 0 35.0 201185253 POINT (-122.89692 47.043535) PUGET SOUND ENERGY INC 5.306701e+10
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
159462 KM8JBDA2XP Skamania Underwood WA 98651.0 2023 HYUNDAI TUCSON Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 33 0 14.0 235949514 POINT (-121.5312858 45.7348285) BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF S... 5.305995e+10
159463 1G1FZ6S02M Skagit Bow WA 98232.0 2021 CHEVROLET BOLT EV Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 40.0 148544168 POINT (-122.440636 48.5613885) PUGET SOUND ENERGY INC 5.305795e+10
159464 YV4H60CX2P King Sammamish WA 98029.0 2023 VOLVO XC90 Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 32 0 5.0 240200754 POINT (-121.9993659 47.5484866) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 5.303303e+10
159465 5YJ3E1EA7K Whatcom Bellingham WA 98225.0 2019 TESLA MODEL 3 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 220 0 40.0 156680590 POINT (-122.486115 48.761615) PUGET SOUND ENERGY INC||PUD NO 1 OF WHATCOM CO... 5.307300e+10
159466 7SAYGDEF6N Island Camano Island WA 98282.0 2022 TESLA MODEL Y Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 10.0 208285619 POINT (-122.5310901 48.2192797) BONNEVILLE POWER ADMINISTRATION||PUD 1 OF SNOH... 5.302997e+10

159467 rows × 17 columns

In [11]:
df.head(5)
Out[11]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract
0 2C4RC1N71H Kitsap Bremerton WA 98311.0 2017 CHRYSLER PACIFICA Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 33 0 23.0 349437882 POINT (-122.6466274 47.6341188) PUGET SOUND ENERGY INC 5.303509e+10
1 2C4RC1N7XL Stevens Colville WA 99114.0 2020 CHRYSLER PACIFICA Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 32 0 7.0 154690532 POINT (-117.90431 48.547075) AVISTA CORP 5.306595e+10
2 KNDC3DLCXN Yakima Yakima WA 98908.0 2022 KIA EV6 Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 14.0 219969144 POINT (-120.6027202 46.5965625) PACIFICORP 5.307700e+10
3 5YJ3E1EA0J Kitsap Bainbridge Island WA 98110.0 2018 TESLA MODEL 3 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 215 0 23.0 476786887 POINT (-122.5235781 47.6293323) PUGET SOUND ENERGY INC 5.303509e+10
4 1N4AZ1CP7J Thurston Tumwater WA 98501.0 2018 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 151 0 35.0 201185253 POINT (-122.89692 47.043535) PUGET SOUND ENERGY INC 5.306701e+10
In [12]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159467 entries, 0 to 159466
Data columns (total 17 columns):
 #   Column                                             Non-Null Count   Dtype  
---  ------                                             --------------   -----  
 0   VIN (1-10)                                         159467 non-null  object 
 1   County                                             159463 non-null  object 
 2   City                                               159463 non-null  object 
 3   State                                              159467 non-null  object 
 4   Postal Code                                        159463 non-null  float64
 5   Model Year                                         159467 non-null  int64  
 6   Make                                               159467 non-null  object 
 7   Model                                              159467 non-null  object 
 8   Electric Vehicle Type                              159467 non-null  object 
 9   Clean Alternative Fuel Vehicle (CAFV) Eligibility  159467 non-null  object 
 10  Electric Range                                     159467 non-null  int64  
 11  Base MSRP                                          159467 non-null  int64  
 12  Legislative District                               159106 non-null  float64
 13  DOL Vehicle ID                                     159467 non-null  int64  
 14  Vehicle Location                                   159458 non-null  object 
 15  Electric Utility                                   159463 non-null  object 
 16  2020 Census Tract                                  159463 non-null  float64
dtypes: float64(3), int64(4), object(10)
memory usage: 20.7+ MB
In [13]:
df.shape
Out[13]:
(159467, 17)
In [14]:
category=[i for i in df.columns if df[i].dtype=='object']
numerical=[i for i in df.columns if df[i].dtype!='object']
In [15]:
print(category)
['VIN (1-10)', 'County', 'City', 'State', 'Make', 'Model', 'Electric Vehicle Type', 'Clean Alternative Fuel Vehicle (CAFV) Eligibility', 'Vehicle Location', 'Electric Utility']
In [16]:
print(numerical)
['Postal Code', 'Model Year', 'Electric Range', 'Base MSRP', 'Legislative District', 'DOL Vehicle ID', '2020 Census Tract']

univarient analysis

In [17]:
df[numerical].describe()
Out[17]:
Postal Code Model Year Electric Range Base MSRP Legislative District DOL Vehicle ID 2020 Census Tract
count 159463.000000 159467.000000 159467.000000 159467.00000 159106.000000 1.594670e+05 1.594630e+05
mean 98170.373635 2020.192510 64.283319 1227.63716 29.261675 2.140242e+08 5.297287e+10
std 2453.354932 3.010564 94.634277 8930.03468 14.843878 7.959275e+07 1.621526e+09
min 1730.000000 1997.000000 0.000000 0.00000 1.000000 4.385000e+03 1.081042e+09
25% 98052.000000 2018.000000 0.000000 0.00000 18.000000 1.731016e+08 5.303301e+10
50% 98122.000000 2021.000000 14.000000 0.00000 33.000000 2.198450e+08 5.303303e+10
75% 98370.000000 2023.000000 84.000000 0.00000 43.000000 2.448363e+08 5.305307e+10
max 99577.000000 2024.000000 337.000000 845000.00000 49.000000 4.792548e+08 5.603300e+10
In [62]:
df[category].describe()
Out[62]:
VIN (1-10) County City State Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Vehicle Location Electric Utility
count 159467 159467 159463 159467 159467 159467 159467 159467 159458 159463
unique 9855 29 696 45 29 30 2 3 835 75
top 7SAYGDEE6P King Seattle WA TESLA MODEL Y Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... POINT (-122.12302 47.67668) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA)
freq 975 83413 26932 159106 72445 30841 124153 77195 4095 58884
In [67]:
# Multivariate Analysis

# Correlation Between Variables
correlation_matrix = df[numerical].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
In [69]:
# Regression Analysis
sns.lmplot(x='2020 Census Tract', y='Postal Code', data=df, height=6, aspect=1.5)
plt.title('Regression Analysis: Income Level vs EV Ownership')
plt.show()
C:\Users\saiph\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight
  self._figure.tight_layout(*args, **kwargs)
In [25]:
df.State.value_counts()
Out[25]:
State
WA    159106
CA        95
VA        37
MD        35
TX        22
NC        14
IL        13
CO        13
FL         9
HI         9
OR         9
AZ         9
NJ         8
CT         7
SC         7
NY         7
GA         7
NV         6
LA         6
MO         4
DC         3
MA         3
KY         3
PA         3
NE         3
AL         3
IN         2
BC         2
KS         2
ID         2
UT         2
OH         2
AR         2
AP         1
AE         1
OK         1
IA         1
DE         1
MI         1
WY         1
AK         1
MT         1
NH         1
MS         1
MN         1
Name: count, dtype: int64
In [26]:
df.Make.value_counts()
Out[26]:
Make
TESLA                   72445
NISSAN                  13795
CHEVROLET               12568
FORD                     8009
BMW                      6842
KIA                      6754
TOYOTA                   5535
VOLKSWAGEN               4424
VOLVO                    3746
JEEP                     3690
HYUNDAI                  3671
AUDI                     3246
RIVIAN                   2934
CHRYSLER                 2859
MERCEDES-BENZ            1189
PORSCHE                  1065
MITSUBISHI                911
HONDA                     833
MINI                      821
POLESTAR                  804
FIAT                      795
SUBARU                    690
SMART                     276
MAZDA                     269
LINCOLN                   236
JAGUAR                    220
LUCID                     208
LEXUS                     201
CADILLAC                  197
GENESIS                   130
LAND ROVER                 48
ALFA ROMEO                 19
FISKER                     17
AZURE DYNAMICS              8
TH!NK                       5
WHEEGO ELECTRIC CARS        3
DODGE                       2
BENTLEY                     2
Name: count, dtype: int64
In [27]:
df.Model.value_counts()
Out[27]:
Model
MODEL Y        30841
MODEL 3        28589
LEAF           13311
MODEL S         7650
BOLT EV         6112
               ...  
FLYING SPUR        1
918                1
OCEAN              1
S-10 PICKUP        1
BENTAYGA           1
Name: count, Length: 130, dtype: int64
In [28]:
df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts()
Out[28]:
Clean Alternative Fuel Vehicle (CAFV) Eligibility
Eligibility unknown as battery range has not been researched    77195
Clean Alternative Fuel Vehicle Eligible                         63824
Not eligible due to low battery range                           18448
Name: count, dtype: int64
In [29]:
df['Electric Vehicle Type'].value_counts()
Out[29]:
Electric Vehicle Type
Battery Electric Vehicle (BEV)            124153
Plug-in Hybrid Electric Vehicle (PHEV)     35314
Name: count, dtype: int64
In [30]:
df['Electric Range'].value_counts()
Out[30]:
Electric Range
0      77195
215     6359
220     4063
84      3965
25      3752
       ...  
95         3
74         3
11         2
59         1
57         1
Name: count, Length: 102, dtype: int64
In [36]:
#writing a function to count
def func(a):
    top_types = df[a].value_counts().nlargest(29).index
    df[a] = df[a].where(df[a].isin(top_types), 'Other')
    counts = df[a].value_counts()
    return counts
In [42]:
#calling a function to count county
countCounty=func('County')
In [46]:
import matplotlib.pyplot as plt
plt.figure(figsize=(20,8))
plt.bar(countCounty.index, countCounty.values)
plt.xlabel('County')
plt.ylabel('Count')
plt.title('Top 29 Types vs Other')
plt.xticks(rotation=45)
plt.show()
In [44]:
df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts().plot.pie()
Out[44]:
<Axes: ylabel='count'>
In [45]:
countModel=func('Model')
In [47]:
plt.figure(figsize=(20,8))
plt.bar(countModel.index, countModel.values)
plt.xlabel('Model')
plt.ylabel('Models')
plt.title('Top 29 Types vs Other City')
plt.xticks(rotation=45)
plt.show()
In [49]:
countMake=func('Make')
plt.figure(figsize=(20,8))
plt.bar(countMake.index, countMake.values)
plt.xlabel('Make')
plt.ylabel('Count')
plt.title('Top 29 Makes vs Other Makes')
plt.xticks(rotation=45)
plt.show()

Data Preparation¶

In [22]:
#number of duplicated rows
df.duplicated().sum()
Out[22]:
0
In [23]:
#missing values
df.isnull().sum()
Out[23]:
VIN (1-10)                                             0
County                                                 4
City                                                   4
State                                                  0
Postal Code                                            4
Model Year                                             0
Make                                                   0
Model                                                  0
Electric Vehicle Type                                  0
Clean Alternative Fuel Vehicle (CAFV) Eligibility      0
Electric Range                                         0
Base MSRP                                              0
Legislative District                                 361
DOL Vehicle ID                                         0
Vehicle Location                                       9
Electric Utility                                       4
2020 Census Tract                                      4
dtype: int64
In [24]:
#showing the coulmns having the missing values
df.columns[df.isnull().any()]
Out[24]:
Index(['County', 'City', 'Postal Code', 'Legislative District',
       'Vehicle Location', 'Electric Utility', '2020 Census Tract'],
      dtype='object')
In [52]:
import seaborn as sns
plt.figure(figsize=(15,10))
sns.histplot(df['Electric Vehicle Type'])
plt.show()
In [71]:
plt.figure(figsize=(15 , 10))
sns.kdeplot(df['Legislative District'] , fill =True , color="RED")
plt.title("Relation ship between Ownership and Legislative districts")
plt.show()
In [56]:
#Filtering out Washingtion States and calculating count of postal codes
washington_data = df[df['State']=='WA']['Postal Code'].value_counts().reset_index()
washington_data.columns = ['Postal Code','Count']
In [59]:
!pip install folium
Collecting folium
  Obtaining dependency information for folium from https://files.pythonhosted.org/packages/a2/1a/37c7ee1bc806d6c32621fecc72c19f6a9f9b4369e5e8f406a7c16d49f031/folium-0.15.0-py2.py3-none-any.whl.metadata
  Downloading folium-0.15.0-py2.py3-none-any.whl.metadata (3.4 kB)
Collecting branca>=0.6.0 (from folium)
  Obtaining dependency information for branca>=0.6.0 from https://files.pythonhosted.org/packages/2f/e7/603b136221de923055716d23e3047da71f92e0d8ba2c4517ce49a54fe768/branca-0.7.0-py3-none-any.whl.metadata
  Downloading branca-0.7.0-py3-none-any.whl.metadata (1.5 kB)
Requirement already satisfied: jinja2>=2.9 in c:\users\saiph\anaconda3\lib\site-packages (from folium) (3.1.2)
Requirement already satisfied: numpy in c:\users\saiph\anaconda3\lib\site-packages (from folium) (1.24.3)
Requirement already satisfied: requests in c:\users\saiph\anaconda3\lib\site-packages (from folium) (2.31.0)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\saiph\anaconda3\lib\site-packages (from jinja2>=2.9->folium) (2.1.1)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\saiph\anaconda3\lib\site-packages (from requests->folium) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\saiph\anaconda3\lib\site-packages (from requests->folium) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\saiph\anaconda3\lib\site-packages (from requests->folium) (1.26.16)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\saiph\anaconda3\lib\site-packages (from requests->folium) (2023.7.22)
Downloading folium-0.15.0-py2.py3-none-any.whl (100 kB)
   ---------------------------------------- 0.0/100.3 kB ? eta -:--:--
   ---- ----------------------------------- 10.2/100.3 kB ? eta -:--:--
   ----------------------- --------------- 61.4/100.3 kB 812.7 kB/s eta 0:00:01
   -------------------------------------- 100.3/100.3 kB 958.1 kB/s eta 0:00:00
Downloading branca-0.7.0-py3-none-any.whl (25 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.7.0 folium-0.15.0
In [60]:
#Choropleth map using Folium
import folium
map = folium.Map(location=[47.5976336,-122.2211712], zoom_start=10)
url = (
    "https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/master/wa_washington_zip_codes_geo.min.json"
)

folium.Choropleth(
    geo_data= url,
    name="choropleth",
    data=washington_data,
    columns=["Postal Code", "Count"],
    key_on="properties.ZCTA5CE10",
    fill_color="YlGn",
    fill_opacity=0.7,
    line_opacity=0.3,
    legend_name="EV count",
).add_to(map)

folium.LayerControl().add_to(map)
Out[60]:
<folium.map.LayerControl at 0x167c72ed5d0>
In [61]:
map
Out[61]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]: